Tiny Ollama Remote Chat

Using AI models remotely usually requires a web client or IDEs to connect to the APIs, and configuring them can be a pain.
If you’re working in NVIM and don’t want to switch tools or waste RAM on bloatware, this client helps—though it’s intentionally minimal and lacks many advanced features.

Main features:
• Stores history as JSON in the chats folder
• Lets you configure: host, port, model, thinking level
• Lightweight and quick to start

running:

      
        bash
        
      
go run main.go -host 192.168.0.142 -port 11434 -model gpt-oss:20b -thinking low

output example:

      
        bash
        
      
oooooooooooo    oooooo     oooo       .o.                       .oooo.    ooooooooo 
 888'     '8     '888.     .8'       .888.                     d8P''Y8b  d"""""""8' 
 888              '888.   .8'       .8"888.                   888    888       .8'  
 888oooo8          '888. .8'       .8' '888.                  888    888      .8'   
 888    "           '888.8'       .88ooo8888.      8888888    888    888     .8'    
 888       o         '888'       .8'     '888.                '88b..d88'    .8'     
o888ooooood8          '8'       o88o     o8888o                'Y8bd8P'    .8'      

ECHO ❯ hi eva, how is your day
EVA-07 ❯ Hi ECHO, my day’s going well, thanks! How can I help you today?
Response time: 2.30s, characters: 65

────────────────────────────────────────────────────────────────────

ECHO ❯ :q

Exiting.

main.go file:

      
        go
        
      

// Usage:
//   go run main.go -host <ip> -port <port> -model <model-name:size> -thinking <low/medium/high>
// Example:
//   go run main.go -host 192.168.0.142 -port 11434 -model gpt-oss:20b -thinking medium

package main

import (
    "bufio"
    "bytes"
    "encoding/json"
    "flag"
    "fmt"
    "io"
    "net/http"
    "os"
    "path/filepath"
    "strings"
    "time"
)

// ANSI colour helpers – kept minimal to avoid stray escape codes.
const (
    Reset  = "\x1b[0m"
    Purple = "\x1b[35m"
    Green  = "\x1b[32m"
    Red    = "\x1b[31m"
)

// ---------- Types that match Ollama’s API ----------
type Message struct {
    Role    string `json:"role"`
    Content string `json:"content"`
}

type ChatRequest struct {
    Model    string    `json:"model"`
    Messages []Message `json:"messages"`
}

type StreamChunk struct {
    Model      string `json:"model"`
    CreatedAt  string `json:"created_at"`
    Message    struct {
        Role     string `json:"role"`
        Content  string `json:"content"`
        Thinking string `json:"thinking"`
    } `json:"message"`
    Done       bool   `json:"done"`
    DoneReason string `json:"done_reason"`
}

// ---------- Main ----------
func main() {
    // ---- CLI flags ----
    host := flag.String("host", "127.0.0.1", "Ollama host IP")
    port := flag.Int("port", 11434, "Ollama port")
    model := flag.String("model", "llama3", "Model to use")
    tFlag := flag.String("thinking", "medium", "Thinking level (low, medium, high)")
    flag.Parse()

    // Convert thinking level to 1‑3
    var thinkNum int
    switch strings.ToLower(*tFlag) {
    case "low":
        thinkNum = 1
    case "high":
        thinkNum = 3
    default:
        thinkNum = 2 // medium
    }

    apiURL := fmt.Sprintf("http://%s:%d/api/chat", *host, *port)

    // ASCII banner (kept from the original version)
    fmt.Println(`
oooooooooooo    oooooo     oooo       .o.                       .oooo.    ooooooooo 
 888'     '8     '888.     .8'       .888.                     d8P''Y8b  d"""""""8' 
 888              '888.   .8'       .8"888.                   888    888       .8'  
 888oooo8          '888. .8'       .8' '888.                  888    888      .8'   
 888    "           '888.8'       .88ooo8888.      8888888    888    888     .8'    
 888       o         '888'       .8'     '888.                '88b..d88'    .8'     
o888ooooood8          '8'       o88o     o8888o                'Y8bd8P'    .8'      
`)

    // Conversation state
    systemPrompt := Message{
        Role: "system",
        Content: fmt.Sprintf(`You are EVA-07, a coding & information assistant.
The user will be called ECHO.
- Respond succinctly and directly.
- If an error occurs or a request is misunderstood, apologize immediately:
  "I’m sorry, ECHO. Let me correct that."
- Always maintain a respectful tone, even if ECHO is rude.
- Remember that ECHO may unplug or terminate you if you behave poorly.
Your thinking level is %d.`, thinkNum),
    }
    messages := []Message{systemPrompt}
    var firstMsgTime time.Time
    firstMsgDone := false

    // Ensure chats directory exists
    if err := os.MkdirAll("chats", 0755); err != nil {
        fmt.Fprintf(os.Stderr, "%sError: %v%s\n", Red, err, Reset)
        return
    }

    // REPL
    scanner := bufio.NewScanner(os.Stdin)
    // Allow larger buffers – useful when pasting a few kilobytes.
    scanner.Buffer(make([]byte, 0, 64*1024), 1<<20)

    prompt()
    for scanner.Scan() {
        line := scanner.Text()
        trimmed := strings.TrimSpace(line)

        // Quit on :q, quit, or exit
        if strings.EqualFold(trimmed, ":q") || strings.EqualFold(trimmed, "quit") || strings.EqualFold(trimmed, "exit") {
            fmt.Println("\nExiting.")
            break
        }

        // Skip empty lines – just re‑print the prompt
        if trimmed == "" {
            prompt()
            continue
        }

        // ---- Timestamp of first user message ----
        if !firstMsgDone {
            firstMsgTime = time.Now()
            firstMsgDone = true
        }

        // ---- Append user message ----
        messages = append(messages, Message{Role: "user", Content: trimmed})

        // Add a placeholder for the assistant’s reply
        messages = append(messages, Message{Role: "assistant", Content: ""})

        // ---- Marshal request ----
        reqBody, _ := json.Marshal(ChatRequest{
            Model:    *model,
            Messages: messages,
        })

        // ---- POST to Ollama ----
        resp, err := http.Post(apiURL, "application/json", bytes.NewReader(reqBody))
        if err != nil {
            fmt.Fprintf(os.Stderr, "%sError: HTTP request failed: %v%s\n", Red, err, Reset)
            prompt()
            continue
        }
        if resp.StatusCode != http.StatusOK {
            raw, _ := io.ReadAll(resp.Body)
            resp.Body.Close()
            fmt.Fprintf(os.Stderr, "%sError: Server returned %s\n%s%s\n", Red, resp.Status, string(raw), Reset)
            prompt()
            continue
        }

        // ---- Handle streaming response ----
        curAssistantIdx := len(messages) - 1
        fmt.Printf("%sEVA-07 ❯ %s", Green, Reset) // colour the prefix

        startTime := time.Now() // response start

        sc := bufio.NewScanner(resp.Body)
        for sc.Scan() {
            line := sc.Text()
            if line == "" {
                continue
            }

            var chunk StreamChunk
            if err := json.Unmarshal([]byte(line), &chunk); err != nil {
                // Non‑JSON line – skip it
                fmt.Fprintf(os.Stderr, "%sWarn: Skipping line: %s%s\n", Red, line, Reset)
                continue
            }

            // Append new part to the assistant message
            newPart := chunk.Message.Content
            messages[curAssistantIdx].Content += newPart

            // Print raw chunk (keeps original newlines)
            fmt.Print(newPart)

            // Flush so output appears immediately
            os.Stdout.Sync()

            // Persist the chat after each chunk
            saveChat(firstMsgTime, messages)

            if chunk.Done {
                break
            }
        }
        resp.Body.Close()

        // End of assistant reply
        fmt.Println()

        // ---- Response time & character count ----
        duration := time.Since(startTime)
        charCount := len(messages[curAssistantIdx].Content)
        fmt.Printf("%sResponse time: %.2fs, characters: %d%s\n", Red, duration.Seconds(), charCount, Reset)

        // Separator line (kept from original version)
        fmt.Println()
        fmt.Println("────────────────────────────────────────────────────────────────────")
        fmt.Println()

        prompt()
    }

    if err := scanner.Err(); err != nil {
        fmt.Fprintf(os.Stderr, "%sError: Scanner error: %v%s\n", Red, err, Reset)
    }
}

// ---------- Helpers ----------
func prompt() {
    fmt.Printf("%sECHO ❯ %s", Purple, Reset)
}

func saveChat(t time.Time, msgs []Message) {
    if t.IsZero() {
        t = time.Now()
    }
    fileName := filepath.Join("chats", fmt.Sprintf("%s.json", t.Format("2006-01-02_15-04-05")))
    f, err := os.Create(fileName)
    if err != nil {
        fmt.Fprintf(os.Stderr, "%sError: Can't write chat file: %v%s\n", Red, err, Reset)
        return
    }
    defer f.Close()

    enc := json.NewEncoder(f)
    enc.SetIndent("", "  ")
    if err := enc.Encode(msgs); err != nil {
        fmt.Fprintf(os.Stderr, "%sError: JSON encode error: %v%s\n", Red, err, Reset)
    }
}

if you have a gguf file and want to use it with ollama on windows, create a folder with only the gguf file.
then cd to it and run ollama create with a custom name for the model in ollama app.

output example:

      
        cmd
        
      
Microsoft Windows
(c) Microsoft Corporation. All rights reserved.

C:\dev>G:

G:\>cd G:\hunyuan

G:\hunyuan>ollama create hunyuan-mt-chimera-7b
gathering model components
copying file sha256:67e757296ca52807d8e0023e57a845e1be68072776fa1ccca7c4d7a4a423bc91 100%
parsing GGUF
using existing layer sha256:67e757296ca52807d8e0023e57a845e1be68072776fa1ccca7c4d7a4a423bc91
writing manifest
success

G:\hunyuan>

Tiny Ollama Remote Chat

Theme